# -*- coding: utf-8 -*-

from scrapy.spiders import Spider
from scrapyDemo.items import csdnItem
import re


class CSDNSpider(Spider):
    name = 'csdn'
    start_urls = ["http://blog.csdn.net/hzp666?viewmode=contents"]

    def parse(self, response):
        items = []
        for sel in response.xpath('//span[@class="link_title"]'):
            oschina = csdnItem()
            title = sel.xpath('a/text()').extract()
            link = sel.xpath('a/@href').extract()
            # desc = sel.xpath('text()').extract()
            oschina['url'] = link
            # title类型为list title转string
            utitle = re.sub(r'\s+', '', ''.join(title))  # 去除空格
            print(utitle)
            oschina['title'] = utitle
            items.append(oschina)
            # print(title, link)

        return items
